From b2c35e3d13cc81475e405b41af6f09f075169aca Mon Sep 17 00:00:00 2001 From: "cl349@firebug.cl.cam.ac.uk" Date: Mon, 20 Feb 2006 14:46:14 +0000 Subject: [PATCH] Eliminate tss - tss is meaningless in a paravirtualized kernel and consumes 25% of the per-cpu area. Add CONFIG_X86_NO_TSS to exclude all code which references tss. Add CONFIG_X86_SYSENTER to conditionally include support for sysenter. Change CONFIG_DOUBLEFAULT to depend on !CONFIG_X86_NO_TSS. (sysenter and doublefault need tss). Based on a patch by Jan Beulich Signed-off-by: Christian Limpach --- linux-2.6-xen-sparse/arch/i386/Kconfig | 12 +++++++++++- linux-2.6-xen-sparse/arch/i386/kernel/Makefile | 9 ++++++--- .../arch/i386/kernel/asm-offsets.c | 2 ++ .../arch/i386/kernel/cpu/common-xen.c | 2 ++ linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S | 6 ++++++ .../arch/i386/kernel/init_task-xen.c | 2 ++ .../arch/i386/kernel/process-xen.c | 7 ++++--- linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c | 14 +++++++++----- linux-2.6-xen-sparse/arch/i386/kernel/vm86.c | 8 ++++++++ linux-2.6-xen-sparse/arch/i386/kernel/vsyscall.S | 2 ++ .../include/asm-i386/mach-xen/asm/desc.h | 2 ++ .../include/asm-i386/mach-xen/asm/processor.h | 12 +++++++++++- 12 files changed, 65 insertions(+), 13 deletions(-) diff --git a/linux-2.6-xen-sparse/arch/i386/Kconfig b/linux-2.6-xen-sparse/arch/i386/Kconfig index cfcdff0317..88e1e1b98e 100644 --- a/linux-2.6-xen-sparse/arch/i386/Kconfig +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig @@ -780,7 +780,7 @@ config HOTPLUG_CPU config DOUBLEFAULT default y bool "Enable doublefault exception handler" if EMBEDDED - depends on !XEN + depends on !X86_NO_TSS help This option allows trapping of rare doublefault exceptions that would otherwise cause a system to silently reboot. Disabling this @@ -1176,6 +1176,16 @@ config X86_TRAMPOLINE depends on X86_SMP || (X86_VOYAGER && SMP) default y +config X86_NO_TSS + bool + depends on X86_XEN + default y + +config X86_SYSENTER + bool + depends on !X86_NO_TSS + default y + config KTIME_SCALAR bool default y diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/Makefile b/linux-2.6-xen-sparse/arch/i386/kernel/Makefile index 369c00bdd8..32b54bd7ea 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/Makefile +++ b/linux-2.6-xen-sparse/arch/i386/kernel/Makefile @@ -49,11 +49,13 @@ else vsyscall_note := vsyscall-note.o endif +VSYSCALL_TYPES-y := int80 +VSYSCALL_TYPES-$(CONFIG_X86_SYSENTER) += sysenter # vsyscall.o contains the vsyscall DSO images as __initdata. # We must build both images before we can assemble it. # Note: kbuild does not track this dependency due to usage of .incbin -$(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so -targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so) +$(obj)/vsyscall.o: $(foreach F,$(VSYSCALL_TYPES-y),$(obj)/vsyscall-$F.so) +targets += $(foreach F,$(VSYSCALL_TYPES-y),vsyscall-$F.o vsyscall-$F.so) targets += $(vsyscall_note) vsyscall.lds # The DSO images are built using a special linker script. @@ -81,7 +83,8 @@ $(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o SYSCFLAGS_vsyscall-syms.o = -r $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ - $(obj)/vsyscall-sysenter.o $(obj)/$(vsyscall_note) FORCE + $(foreach F,$(VSYSCALL_TYPES-y),$(obj)/vsyscall-$F.o) \ + $(obj)/$(vsyscall_note) FORCE $(call if_changed,syscall) ifdef CONFIG_XEN diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/asm-offsets.c b/linux-2.6-xen-sparse/arch/i386/kernel/asm-offsets.c index 36d66e2077..fa4f25dada 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/asm-offsets.c +++ b/linux-2.6-xen-sparse/arch/i386/kernel/asm-offsets.c @@ -63,9 +63,11 @@ void foo(void) OFFSET(pbe_orig_address, pbe, orig_address); OFFSET(pbe_next, pbe, next); +#ifdef CONFIG_X86_SYSENTER /* Offset from the sysenter stack to tss.esp0 */ DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) - sizeof(struct tss_struct)); +#endif DEFINE(PAGE_SIZE_asm, PAGE_SIZE); DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL)); diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c index 5d06b3a4a2..4d646fe16f 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c +++ b/linux-2.6-xen-sparse/arch/i386/kernel/cpu/common-xen.c @@ -595,7 +595,9 @@ void __cpuinit cpu_gdt_init(struct Xgt_desc_struct *gdt_descr) void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); +#ifdef CONFIG_DOUBLEFAULT struct tss_struct * t = &per_cpu(init_tss, cpu); +#endif struct thread_struct *thread = ¤t->thread; if (cpu_test_and_set(cpu, cpu_initialized)) { diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S b/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S index d012631501..0df22a8675 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S +++ b/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S @@ -223,6 +223,7 @@ need_resched: jmp need_resched #endif +#ifdef CONFIG_X86_SYSENTER /* SYSENTER_RETURN points to after the "sysenter" instruction in the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ @@ -270,6 +271,7 @@ sysenter_past_esp: xorl %ebp,%ebp sti sysexit +#endif /* CONFIG_X86_SYSENTER */ # system call handler stub @@ -662,6 +664,7 @@ ENTRY(device_not_available) call math_state_restore jmp ret_from_exception +#ifdef CONFIG_X86_SYSENTER /* * Debug traps and NMI can happen at the one SYSENTER instruction * that sets up the real kernel stack. Check here, since we can't @@ -683,12 +686,15 @@ label: \ pushfl; \ pushl $__KERNEL_CS; \ pushl $sysenter_past_esp +#endif /* CONFIG_X86_SYSENTER */ KPROBE_ENTRY(debug) +#ifdef CONFIG_X86_SYSENTER cmpl $sysenter_entry,(%esp) jne debug_stack_correct FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) debug_stack_correct: +#endif /* !CONFIG_X86_SYSENTER */ pushl $-1 # mark this as an int SAVE_ALL xorl %edx,%edx # error code 0 diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/init_task-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/init_task-xen.c index c020397501..c4da1cce82 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/init_task-xen.c +++ b/linux-2.6-xen-sparse/arch/i386/kernel/init_task-xen.c @@ -41,9 +41,11 @@ struct task_struct init_task = INIT_TASK(init_task); EXPORT_SYMBOL(init_task); +#ifndef CONFIG_X86_NO_TSS /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. */ DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; +#endif diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c index b193ece654..3f6e31b472 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c +++ b/linux-2.6-xen-sparse/arch/i386/kernel/process-xen.c @@ -518,7 +518,9 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; int cpu = smp_processor_id(); +#ifndef CONFIG_X86_NO_TSS struct tss_struct *tss = &per_cpu(init_tss, cpu); +#endif physdev_op_t iopl_op, iobmp_op; multicall_entry_t _mcl[8], *mcl = _mcl; @@ -543,10 +545,9 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas * Reload esp0. * This is load_esp0(tss, next) with a multicall. */ - tss->esp0 = next->esp0; mcl->op = __HYPERVISOR_stack_switch; - mcl->args[0] = tss->ss0; - mcl->args[1] = tss->esp0; + mcl->args[0] = __KERNEL_DS; + mcl->args[1] = next->esp0; mcl++; /* diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c b/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c index 0bada1870b..3bd61e7bf0 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c +++ b/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c @@ -23,6 +23,7 @@ extern asmlinkage void sysenter_entry(void); void enable_sep_cpu(void) { +#ifdef CONFIG_X86_SYSENTER int cpu = get_cpu(); struct tss_struct *tss = &per_cpu(init_tss, cpu); @@ -37,6 +38,7 @@ void enable_sep_cpu(void) wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0); wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0); put_cpu(); +#endif } /* @@ -52,16 +54,18 @@ int __init sysenter_setup(void) __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY_EXEC); - if (!boot_cpu_has(X86_FEATURE_SEP)) { +#ifdef CONFIG_X86_SYSENTER + if (boot_cpu_has(X86_FEATURE_SEP)) { memcpy(page, - &vsyscall_int80_start, - &vsyscall_int80_end - &vsyscall_int80_start); + &vsyscall_sysenter_start, + &vsyscall_sysenter_end - &vsyscall_sysenter_start); return 0; } +#endif memcpy(page, - &vsyscall_sysenter_start, - &vsyscall_sysenter_end - &vsyscall_sysenter_start); + &vsyscall_int80_start, + &vsyscall_int80_end - &vsyscall_int80_start); return 0; } diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c b/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c index f51c894a7d..da2d48e178 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c +++ b/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c @@ -97,7 +97,9 @@ struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs)); struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) { +#ifndef CONFIG_X86_NO_TSS struct tss_struct *tss; +#endif struct pt_regs *ret; unsigned long tmp; @@ -122,7 +124,9 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) do_exit(SIGSEGV); } +#ifndef CONFIG_X86_NO_TSS tss = &per_cpu(init_tss, get_cpu()); +#endif current->thread.esp0 = current->thread.saved_esp0; current->thread.sysenter_cs = __KERNEL_CS; load_esp0(tss, ¤t->thread); @@ -251,7 +255,9 @@ out: static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk) { +#ifndef CONFIG_X86_NO_TSS struct tss_struct *tss; +#endif /* * make sure the vm86() system call doesn't try to do anything silly */ @@ -295,7 +301,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk savesegment(fs, tsk->thread.saved_fs); savesegment(gs, tsk->thread.saved_gs); +#ifndef CONFIG_X86_NO_TSS tss = &per_cpu(init_tss, get_cpu()); +#endif tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; if (cpu_has_sep) tsk->thread.sysenter_cs = 0; diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/vsyscall.S b/linux-2.6-xen-sparse/arch/i386/kernel/vsyscall.S index b403890fe3..432aa46649 100644 --- a/linux-2.6-xen-sparse/arch/i386/kernel/vsyscall.S +++ b/linux-2.6-xen-sparse/arch/i386/kernel/vsyscall.S @@ -7,9 +7,11 @@ vsyscall_int80_start: .incbin "arch/i386/kernel/vsyscall-int80.so" vsyscall_int80_end: +#ifdef CONFIG_X86_SYSENTER .globl vsyscall_sysenter_start, vsyscall_sysenter_end vsyscall_sysenter_start: .incbin "arch/i386/kernel/vsyscall-sysenter.so" vsyscall_sysenter_end: +#endif __FINIT diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h index e903b666c8..7d9ffc75d2 100644 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/desc.h @@ -61,6 +61,7 @@ __asm__ __volatile__ ("movw %w3,0(%2)\n\t" \ "rorl $16,%1" \ : "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type)) +#ifndef CONFIG_X86_NO_TSS static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr) { _set_tssldt_desc(&get_cpu_gdt_table(cpu)[entry], (int)addr, @@ -68,6 +69,7 @@ static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *ad } #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) +#endif static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size) { diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h index 2b78658f1b..8ff4c15cb9 100644 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/processor.h @@ -91,8 +91,10 @@ struct cpuinfo_x86 { extern struct cpuinfo_x86 boot_cpu_data; extern struct cpuinfo_x86 new_cpu_data; +#ifndef CONFIG_X86_NO_TSS extern struct tss_struct doublefault_tss; DECLARE_PER_CPU(struct tss_struct, init_tss); +#endif #ifdef CONFIG_SMP extern struct cpuinfo_x86 cpu_data[]; @@ -343,7 +345,9 @@ extern int bootloader_type; #define IO_BITMAP_BITS 65536 #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) +#ifndef CONFIG_X86_NO_TSS #define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) +#endif #define INVALID_IO_BITMAP_OFFSET 0x8000 #define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000 @@ -401,6 +405,7 @@ typedef struct { struct thread_struct; +#ifndef CONFIG_X86_NO_TSS struct tss_struct { unsigned short back_link,__blh; unsigned long esp0; @@ -446,6 +451,7 @@ struct tss_struct { */ unsigned long stack[64]; } __attribute__((packed)); +#endif #define ARCH_MIN_TASKALIGN 16 @@ -482,6 +488,7 @@ struct thread_struct { .io_bitmap_ptr = NULL, \ } +#ifndef CONFIG_X86_NO_TSS /* * Note that the .io_bitmap member must be extra-big. This is because * the CPU will access an additional byte beyond the end of the IO @@ -504,8 +511,11 @@ static inline void load_esp0(struct tss_struct *tss, struct thread_struct *threa tss->ss1 = thread->sysenter_cs; wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); } - HYPERVISOR_stack_switch(tss->ss0, tss->esp0); } +#else +#define load_esp0(tss, thread) \ + HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0) +#endif #define start_thread(regs, new_eip, new_esp) do { \ __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \ -- 2.30.2